In [35]:
import string
import scipy
import Tkinter, tkFileDialog
import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import sys
import glob
import re
import cPickle
import pdb
sys.path.append(os.path.abspath("C:\Users\Scherer Lab E\Documents\GitHub\Python_Data_Analysis"))
import common_functions
%matplotlib notebook
In [36]:
pd.options.io.hdf.default_format = 'table'

Prepare the HDF5 File

In [37]:
cd "K:\Pat's_Projects\ParticleTrajectoryData"
K:\Pat's_Projects\ParticleTrajectoryData
In [38]:
store = pd.HDFStore('driven_optical_matter.h5', mode='w')

Function for adding DataFrames and MetaData

In [39]:
def add_list_of_dfs_to_hdf(hdf_obj, dfs_list, experiment_name, movie_names_list, selector_metadata, individual_metadata=None):
    '''A function for adding DataFrames to an HDF5 file and making entries
    to the index table used to select based on metadata
    
    :param hdf_obj: an HDFStore pandas object to store the data in
    :param dfs_list: A list of the data frames you want to add to the HDF file
    :param (str) experiment_name: A string to tag the experiment that all 
    DataFrames in dfs_list falls under
    :param (list) movie_names_list: A list of strings to name each of the 
    DataFrames by in dfs_list. Must be the same length as dfs_list
    :param (dict) selector_metadata: A dictionary of keys and values to describe
    the data that will be put in the indexer table. They can contain single
    values or a list of length equal to dfs_list.
    '''
    selector_df = pd.DataFrame(selector_metadata)
    #selector_df['mov_index'] = selector_df.index.values+ 1 
    for num,i in enumerate(dfs_list):
        movie_name = re.search('(Mov_[0123456789]{8})',movie_names_list[num])
        key = experiment_name+'/'+movie_name.groups()[0]
        selector_df.loc[num, 'mov_index'] = int(movie_name.groups()[0][-2:])
        selector_df.loc[num, 'key'] = key
        hdf_obj.put(key, i)
    # Pandas will make the mov_index column Floats because appending will make it
    # have NaNs.
    selector_df.loc[:,'mov_index'] = selector_df.loc[:,'mov_index'].astype('float64')
    try:
        hdf_obj.get('index')
    except KeyError:
        hdf_obj.put('index', selector_df)
    if selector_df.isin(hdf_obj.index.reset_index()).all().all() == True:
        print "Values already in index!"
        return
    elif selector_df.isin(hdf_obj.index.reset_index()).any().any() == False:
        print "Some values of the index match while others don't"
        print selector_df[selector_df.isin(hdf_obj.index)]
        return 
    else:
        hdf_obj.append('index', selector_df)
        hdf_obj.put('index', hdf_obj.index.reset_index(drop=True))
        return

Exp01161501

In [40]:
trajectory_dir = "C:\Users\Scherer Lab E\Downloads\TrackingGUI_and_associated_files_20July2014 My Version\\"
movie_list = glob.glob(trajectory_dir+"Mov_011615*linked.pandas")
data_list = []
for i in movie_list:
    data_list.append(pd.read_pickle(i))
In [41]:
'''Get just the file names'''
file_list = [os.path.split(i)[1] for i in movie_list]

Metadata for Exp01161501

In [42]:
meta_data = {}
meta_data['L'] = [1,0,2,2,3,3,4,4,5,0,0]
meta_data['substrate'] = 'glass'
meta_data['experiment'] = 'Exp01161501'
meta_data['mag_slider'] = 1.6
meta_data['date'] = pd.datetime(2015,1,16)
In [43]:
for num, i in enumerate(data_list):
    #del i['nn_dist']
    #del i['nn_part']
    temp_df = i.groupby('frame', group_keys=False).apply(common_functions.find_nn_ver_2).reset_index()
    data_list[num] = temp_df[['frame','track id','x pos','y pos','nn_num','nn_id','nn_dist','theta','r']]
In [44]:
%pdb off
Automatic pdb calling has been turned OFF
In [45]:
add_list_of_dfs_to_hdf(store, data_list, 'Exp01161501', file_list, meta_data)
Values already in index!

Exp02101501

In [46]:
trajectory_dir = "C:\Users\Scherer Lab E\Downloads\TrackingGUI_and_associated_files_20July2014 My Version\\"
movie_list = glob.glob(trajectory_dir+"Mov_021015*linked.pandas")
data_list = []
for i in movie_list:
    data_list.append(pd.read_pickle(i))
In [47]:
'''Get just the file names'''
file_list = [os.path.split(i)[1] for i in movie_list]

Metadata for Exp02101501

In [48]:
meta_data = {}
meta_data['L'] = [0,2,1,1,2,2,3,4,4,4,4,4,5]
meta_data['substrate'] = 'plate'
meta_data['experiment'] = 'Exp02101501'
meta_data['mag_slider'] = 1.0
meta_data['date'] = pd.datetime(2015,2,20)
In [49]:
for num, i in enumerate(data_list):
    #del i['nn_dist']
    #del i['nn_part']
    temp_df = i.groupby('frame', group_keys=False).apply(common_functions.find_nn_ver_2).reset_index()
    data_list[num] = temp_df[['frame','track id','x pos','y pos','nn_num','nn_id','nn_dist','theta','r']]
In [50]:
%pdb off
Automatic pdb calling has been turned OFF
In [51]:
add_list_of_dfs_to_hdf(store, data_list, 'Exp02101501', file_list, meta_data)

Exp11201301 and 12031301

In [52]:
data_dir="J:\Pat's Projects\Dynamical Phase Transition\Mosaic Trajectories"
fit_params=cPickle.load(open(data_dir+'circle_fitting_params_082214.pkl','r'))
expt_data=cPickle.load(open(data_dir+'data_frames_082214.pkl','r'))
expt_list=cPickle.load(open(data_dir+'expt_list_082214.pkl','r'))
In [53]:
for num, i in enumerate(expt_data):
    del i['nn_dist']
    del i['nn_part']
    temp_df = i.groupby('frame', group_keys=False).apply(common_functions.find_nn_ver_2).reset_index()
    expt_data[num] = temp_df[['frame','track id','x pos','y pos','nn_num','nn_id','nn_dist','theta','r']]
In [54]:
len(expt_data)
Out[54]:
17

Exp11201301

In [55]:
expt_list_11 = expt_list[:5]
expt_data_11 = expt_data[:5]
In [56]:
meta_data = {}
meta_data['L'] = [3,3,2,1,5]
meta_data['substrate'] = 'plate'
meta_data['experiment'] = 'Exp11201301'
meta_data['mag_slider'] = 1.6
meta_data['date'] = pd.datetime(2013,11,20)
In [57]:
add_list_of_dfs_to_hdf(store, expt_data_11, 'Exp11201301', expt_list_11, meta_data)

Exp12031301

In [58]:
expt_list_12 = expt_list[5:]
expt_data_12 = expt_data[5:]
In [59]:
len(expt_data_12)
Out[59]:
12
In [60]:
meta_data = {}
meta_data['L'] = [0,1,2,3,4,5,6,1,2,3,3,4]
meta_data['substrate'] = ['glass']*7 + ['plate']*5
meta_data['experiment'] = 'Exp12031301'
meta_data['mag_slider'] = 1.6
meta_data['date'] = pd.datetime(2013,12,03)
In [61]:
add_list_of_dfs_to_hdf(store, expt_data_12, 'Exp12031301', expt_list_12, meta_data)

Exp01151401

In [62]:
data_dir="J:\Pat's Projects\Dynamical Phase Transition\Mosaic Trajectories"
fit_params=cPickle.load(open(data_dir+'circle_fitting_params_081314.pkl','r'))
expt_data=cPickle.load(open(data_dir+'data_frames_081314.pkl','r'))
expt_list=cPickle.load(open(data_dir+'expt_list_081314.pkl','r'))
In [63]:
for num, i in enumerate(expt_data):
    del i['nn_dist']
    del i['nn_part']
    expt_data[num] = i
In [64]:
temp_df = expt_data[2].groupby('frame', group_keys=False).apply(common_functions.find_nn_ver_2).reset_index()
In [65]:
for num, i in enumerate(expt_data):
    #del i['nn_dist']
    #del i['nn_part']
    temp_df = expt_data[num].groupby('frame', group_keys=False).apply(common_functions.find_nn_ver_2).reset_index()
    #print expt_list[num]
    try:
        expt_data[num] = temp_df[['frame','track id','x pos','y pos','nn_num','nn_id','nn_dist','theta','r']]
    except KeyError:
        temp_df['nn_num'] = np.nan
        temp_df['nn_id'] = np.nan
        temp_df['nn_dist'] = np.nan
        expt_data[num] = temp_df[['frame','track id','x pos','y pos','nn_num','nn_id','nn_dist','theta','r']]
In [66]:
meta_data = {}
meta_data['L'] = [1,1,2,2,3,3,3,3,4,4,4,5,5,5,1,1,1,2,2,0,2,3,3,3,4,4,4,5,5,5,5,10]
meta_data['substrate'] = ['glass']*14 + ['plate']*17 + ['glass']
meta_data['experiment'] = 'Exp01151401'
meta_data['mag_slider'] = 1.6
meta_data['date'] = pd.datetime(2014,01,15)
In [67]:
add_list_of_dfs_to_hdf(store, expt_data, 'Exp01151401', expt_list, meta_data)
In [68]:
store.close()
In [ ]: